from Spider import PSpider
import re,os,sys
from DataManager import DataManager
import json
#
# 获取列表页html
#
def saveItem(html):
     skup = "<li class=\"gl-item\" data-sku=\"(\d+)\""
     skus = re.findall(skup, html)
     for item in skus:
          print(item)
          pstr = "<li class=\"gl-item\" data-sku=\"" + item + "\"[^>]+>([\s\S]*?)</div>[\s]+</li>"
          result = re.findall(pstr, html, re.S)
          if (len(result) == 0):
               continue
          dm.saveItem(item, result[0])
     return len(skus)


key = "手机"

dm = DataManager("items.txt")
s = PSpider()
number=0
href = "https://search.jd.com/s_new.php?keyword="+key+"&enc=utf-8&wq="+key

for p in range(1,50):
     ajaxhref = "https://search.jd.com/s_new.php?keyword="+key+"&enc=utf-8&wq="+key+"&page="+str(p)+"&s="+str(number)+"&click=0"
     html = s.request(ajaxhref, href)
     href = ajaxhref
     number += saveItem(html)

dm.finish()

if (os.getcwd().find("comment") == -1):
     os.chdir(sys.path[0]+"/comment")
# 抓取评论
for item in dm.items:
     itemId = item["商品SKU"]
     print("comment start:"+str(itemId))
     file = open(str(itemId) + ".txt", "a+")
     refer = "https://item.jd.com/"+str(itemId)+".html"
     for p in range(1,3):
          href = "https://sclub.jd.com/comment/productPageComments.action?productId="+str(itemId)\
                 +"&score=0&sortType=5&page="+str(p)+"&pageSize=10&isShadowSku=0&rid=0&fold=1"
          html = s.request(href,refer,"gbk")
          pcs = json.loads(html)
          for c in pcs["comments"]:
               file.write(str(c))
               file.write("\n")
     file.close()
